# Python Script to analyse directional RNAseq data
# python dirRNAseqAnalyse.py path_to_data/

import sys, os, readCollapser2

# GO THROUGH THE RAW .FASTQ FILES       
path = sys.argv[1]
log = 'dirRNAseqAnalyseLog.txt'

for (a,b,f) in os.walk(path):
    for files in f:
        #print files
        if '.fastq' not in files:   continue
        fastqFile = path+files
        outPrefix = files.strip().split('_')[0]
        
        ## TRIM READS
        minimumReadLength = 19 # Makes for 25
        maximumReadLength = 69 # Usually 64 Makes for 70 i.e. I'm asking cutadapt to trim at least 5 bases (from 75)
        adaptorSeq = 'AGATCGGAAGAGCACACGTCTGAACTCCAGTCAC'

        print('Processing '+files+' with cutadapt to remove adaptors')
        #os.chdir('/data6/sukrit/012417_MiSeq_ectoCRISPRtd-spcproc4/FASTQprocessing/') # You need to do this to SPECIFY WHERE THE FILES WILL BE WRITTEN!!!
        os.system('cutadapt -a %s -m %s -M %s --too-short-output %s --too-long-output %s %s > %s 2>> %s'%(adaptorSeq,
                                                                    minimumReadLength+6,
                                                                    maximumReadLength+6,
                                                                    outPrefix+'.trimmed.tooShort',
                                                                    outPrefix+'.trimmed.tooLong',
                                                                    fastqFile,
                                                                    outPrefix+'.trimmed',log))

        ## COLLAPSE READS AND TRIM OFF 6 nt FROM 3' END
        print('Processing '+outPrefix+'.trimmed'+" with readCollapser2 to remove PCR bias and trim 6 Ns from 3' end")
        readCollapser2.main([outPrefix+'.trimmed',outPrefix+'.trimmed.collapsed.fastq'])
